head(asasec,n = 5)
## Section Sname Beginning Revenues
## 1 Aging and the Life Course (018) Aging 12752 12104
## 2 Alcohol, Drugs and Tobacco (030) Alcohol/Drugs 11933 1144
## 3 Altruism and Social Solidarity (047) Altruism 1139 1862
## 4 Animals and Society (042) Animals 473 820
## 5 Asia/Asian America (024) Asia 9056 2116
## Expenses Ending Journal Year Members
## 1 12007 12849 No 2005 598
## 2 400 12677 No 2005 301
## 3 1875 1126 No 2005 NA
## 4 1116 177 No 2005 209
## 5 1710 9462 No 2005 365
Figure 8.1: Back to basics
p <- ggplot(data = subset(asasec,Year == 2014),
mapping = aes(x = Members,
y = Revenues,
label = Sname))
p + geom_point() + geom_smooth()
Figure 8.2: Refining the plot
#Introduce some outliers, switch from loess to OLS and introduce a third variable
p <- ggplot(data = subset(asasec, Year == 2014),
mapping = aes(x = Members,
y = Revenues,
label = Sname))
p + geom_point(mapping = aes(color = Journal)) +
geom_smooth(method = "lm") +
theme(legend.position = "top")
Figure 8.3: Refining the axes
p0 <- ggplot(data = subset(asasec, Year == 2014),
mapping = aes(x = Members,
y = Revenues,
label = Sname))
p1 <- p0 + geom_smooth(method = "lm", se = FALSE, color = "gray80") +
geom_point(mapping = aes(color = Journal))
p2 <- p1 + geom_text_repel(data = subset(asasec, Year == 2014 &
Revenues > 7000), size = 2)
##Changing the axes
p3 <- p2 + labs(x = "Membership",
y = "Revenues",
color = "Section has own Journal",
title = "ASA Sections",
subtitle = "2014 Calendar Year.",
caption = "Source: ASA annual report.")
p4 <- p3 + scale_y_continuous(labels = scales::dollar) +
theme_bw() +
theme(legend.position = "bottom")
p4
You should choose a color palette based on its ability to express the data you are plotting.
Do not map sequential scales to categorical palettes, or use a diverging palette for a variable with no well-defined midpoint.
We choose color palettes for mappings through one of the scale_ functions for color or fill.
RColorBrewer should be used to access color palettes.
Figure 8.7.1: Color palette Set2
p <- ggplot(data = drop_na(organdata,world),
mapping = aes(x = roads,
y = donors,
color = world))
p + geom_point(size = 2) +
scale_color_brewer(palette = "Set2") +
theme(legend.position = "top")
Figure 8.7.2: Color palette Pastel2
p <- ggplot(data = drop_na(organdata,world),
mapping = aes(x = roads,
y = donors,
color = world))
p + geom_point(size = 2) +
scale_color_brewer(palette = "Pastel2") +
theme(legend.position = "top")
Figure 8.7.2: Color palette Dark2
p <- ggplot(data = drop_na(organdata,world),
mapping = aes(x = roads,
y = donors,
color = world))
p + geom_point(size = 2) +
scale_color_brewer(palette = "Dark2") +
theme(legend.position = "top")
Colors can also be specified manually, via scale_color_manual() or scale_fill_manual(). These functions take a value argument that can be specified as a vector of color names or color values that can be identified by R.
Alternatively to color names, color values can be specified via their hexadecimal RGB value.
Figure 8.8: Color blind friendly palette
cb_palette <- c("#999999","#E69F00","#56B4E9",
"#009E73","#F0E442","#0072B2",
"#D55E00","#CC79A7")
p4 + scale_color_manual(values = cb_palette)
Figure 8.10: The background layer
party_colors <- c("#2E74C0",
"#CB454A")
p0 <- ggplot(data = subset(county_data,
flipped == "No"),
mapping = aes(x = pop,
y = black/100))
p1 <- p0 + geom_point(alpha = 0.15,color = "gray50") +
scale_x_log10(label = scales::comma) ##Interesting use of scales::comma
p1
Figure 8.11: Using a second layer that is a complement of the first
p2 <- p1 + geom_point(
data = subset(county_data,flipped == "Yes"),
mapping = aes(color = partywinner16)
) + scale_color_manual(values = party_colors)
p2
Figure 8.12: Adding guides and labels, and fixing the y scale
p3 <- p2 + scale_y_continuous(labels = scales::percent) +
labs(color = "County flipped to ...",
x = "County Population (log scale)",
y = "Percent Black Population",
title = "Flipped counties, 2016",
caption = "Counties in gray did not flip.")
p3 + theme_bw()
Figure 8.13: County-level election data from 2016
p4 <- p3 + theme_bw() +
geom_text_repel(data = subset(county_data,
flipped == "Yes" &
black > 25),
mapping = aes(x = pop,
y = black/100,
label = state), size = 2)
p4 + theme_minimal() +
theme(legend.position = "top")
Themes can be truned on or off using the theme_set() function. It takes the name of a theme as an argument.
Theme functions are a set of detailed instricutions to turn on, turn off, or modify a large number of graphical elements on the plot.
Once set, a theme applies to all subsequent plots, and it remains active until it is replaced by a different theme.
Figure 8.14: Economist and WSJ theme
library(ggthemes)
##Setting the economist theme
theme_set(theme_economist())
p4 + theme_economist() +
theme(legend.position = "top")
##Setting the WSJ theme
theme_set(theme_wsj())
p4 + theme_wsj() +
theme(plot.title = element_text(size = rel(0.6)),
legend.title = element_text(size = rel(0.35)),
plot.caption = element_text(size = rel(0.35)),
legend.position = "top")
Figure 8.16: A customized small multiple
Figure 8.17: Ridgeplot version of the age distribution plot
library(ggridges)
theme_set(theme_minimal())
p <- ggplot(data = gss_lon,
mapping = aes(x = age, y = factor(year, levels = rev(unique(year)),ordered = TRUE)))
p + geom_density_ridges(alpha =0.6, fill = "lightblue", scale = 1.5) +
scale_x_continuous(breaks = c(25,50,75)) +
scale_y_discrete(expand = c(0,4)) +
labs(x = "Age", y = NULL,
title = "Age distribution of\nGSS respondents") +
theme(title = element_text(size = 14, face = "bold"))
The first argument to expand gives expansion equal to its multiplication by limit range; the second argument gives the absolute expansion added to both end of the axis.
The scale argument in geom_density_ridges controls the degree to which the plots overlap.
Most of the time when people draw plots with two y-axes they want to line the series up as closely as possible because they suspect that there is a substantive association between them.
The main problem with using two y-axes is tha tit makes it even easier than usual to foll yourself (or someone else) about the degree of association between the variables, because you can adjust the scaling of the axes relative to one another in a way that moves the data series around.
*Index numbers can have complications of their own, but they allow us to use one axis instead of two, and to calculate a sensible difference between the two series and plot that.
Figure 8.20: Indexed series with a running difference below, using separate plots
fredts_m <- fredts %>% select(date, sp500_i, monbase_i) %>%
gather(key = series, value = score, sp500_i:monbase_i)
p <- ggplot(data = fredts_m,
aes(x = date,
y = score,
group = series,
color = series))
p1 <- p + geom_line() +
theme(legend.position = "top") +
labs(x = "Date",
y = "Index",
color = "Series")
p <- ggplot(data = fredts,
mapping = aes(x = date, y = sp500_i - monbase_i))
p2 <- p + geom_line() +
labs(x = "Date",
y = "Difference")
cowplot::plot_grid(p1, p2, nrow = 2, rel_heights = c(0.75,0.25),
align = "v")